##########################################
## "Voters get what they want"          ##
##########################################
## Heinrich, Kobayashi, Long            ##
##########################################

##          
## Descriptives of September survey experiment
## Script 13
## June 29, 2017


## Load and prep data from survey experiment
############################################
data <- read.csv2("data/MTurk data from September 2015.txt", quote="",sep="|")
data$Age <- 2015 - data$user_birthyr
data$Gender <- ifelse(data$user_gender == "f", 0, 1)
data$LR <- data$p4_politics
data$HiEdu <- ifelse(data$p4_education >= 5, 1, 0)

data$Time <- as.numeric(as.character(data$p8_time))
data$Failures <- data$p4_screener_failures
data <- na.omit(data)
data <- data[, c("Age", "Gender", "LR", "HiEdu", "Time", "Failures")]


## Remove people with excessive screener failures/ times
########################################################
## 1,414 observations; 1,386 retained
n <- nrow(data)
data <- subset(data, Time > 5 & Time < 120)
data <- subset(data, Failures <= 2)
n_dropped <- n - nrow(data)



## CCES data for post-stratification; prep it
#############################################
load("output/data_preppedCCES.Rdata")

desc <- rbind(data.frame(Which="MTurk",
                         Variable=c("Age", "Proportion male", "Left-Right", "Proportion high education"),
                         Mean=c(mean(data$Age), mean(data$Gender), mean(data$LR),
                                mean(data$HiEdu)),
                         SD=c(sd(data$Age), sd(data$Gender), sd(data$LR), sd(data$HiEdu))),
              data.frame(Which="CCES",
                         Variable=c("Age", "Proportion male", "Left-Right", "Proportion high education"),
                         Mean=c(weighted.mean(cces$Age, cces$weight),
                                weighted.mean(cces$Gender, cces$weight),
                                weighted.mean(cces$LR, cces$weight),
                                weighted.mean(cces$HiEdu, cces$weight)),
                         SD=c(weightedSd(cces$Age, cces$weight), 0,
                              weightedSd(cces$LR, cces$weight), 0)))
desc$Hi <- desc$Mean + 1.96 * desc$SD/sqrt(nrow(data))
desc$Lo <- desc$Mean - 1.96 * desc$SD/sqrt(nrow(data))

g_list <- vector("list", 4)
for(i in 1:length(unique(desc$Variable)))
{
  g_list[[i]] <- ggplot(data=subset(desc, Variable == unique(desc$Variable)[i]), aes(x=Which, y=Mean, ymin=Lo, ymax=Hi, group=Which, colour=Which))
  g_list[[i]] <- g_list[[i]] + geom_linerange(position=position_dodge(width=.3), size=1.4)
  g_list[[i]] <- g_list[[i]] + geom_point(position=position_dodge(width=.3), size=3) + xlab("") + ylab(unique(desc$Variable)[i])
  g_list[[i]] <- g_list[[i]] + scale_colour_grey(guide=FALSE) + theme(legend.position="none")
  if(unique(desc$Variable)[i] %in% c("Proportion male", "Proportion high education")) 
  {
    g_list[[i]] <- g_list[[i]] + scale_y_continuous(limits=c(0.20, 0.69))
  }
  g_list[[i]] <- g_list[[i]] + theme_bw() + coord_flip() + ggtitle(unique(desc$Variable)[i])
  if(i > 1) g_list[[i]] <- g_list[[i]] + theme(axis.text.y = element_text(size=rel(.0001), colour="white"))
  g_list[[i]] <- g_list[[i]] + theme(axis.ticks = element_blank(),
                                     plot.title = element_text(size=rel(1.1)))
}

pdf(file="output/figures/A2-September-Descriptives.pdf", width=10, height=4.6)
multiplot(plotlist=g_list, cols=4)
dev.off()

